package com.itextpdf.text.pdf.parser;
import com.itextpdf.testutils.TestResourceUtils;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.List;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import org.junit.Assert;
import org.junit.Test;
import com.itextpdf.text.Document;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;
public class TextRenderInfoTest {
public static final int FIRST_PAGE = 1;
public static final int FIRST_ELEMENT_INDEX = 0;
@Test
public void testCharacterRenderInfos() throws Exception {
byte[] bytes = createSimplePdf(PageSize.LETTER.rotate().rotate(), "ABCD");
//TestResourceUtils.saveBytesToFile(bytes, new File("C:/temp/out.pdf"));
PdfReader r = new PdfReader(bytes);
PdfReaderContentParser parser = new PdfReaderContentParser(r);
parser.processContent(FIRST_PAGE, new CharacterPositionRenderListener());
}
/**
* Test introduced to exclude a bug related to a Unicode quirk for
* Japanese. TextRenderInfo threw an AIOOBE for some characters.
* @throws java.lang.Exception
* @since 5.5.5-SNAPSHOT
*/
@Test
public void testUnicodeEmptyString() throws Exception {
StringBuilder sb = new StringBuilder();
String inFile = "japanese_text.pdf";
PdfReader p = TestResourceUtils.getResourceAsPdfReader(this, inFile);
TextExtractionStrategy strat = new SimpleTextExtractionStrategy();
sb.append(PdfTextExtractor.getTextFromPage(p, FIRST_PAGE, strat));
String result = sb.substring(0, sb.indexOf("\n"));
String origText =
"\u76f4\u8fd1\u306e\u0053\uff06\u0050\u0035\u0030\u0030"
+ "\u914d\u5f53\u8cb4\u65cf\u6307\u6570\u306e\u30d1\u30d5"
+ "\u30a9\u30fc\u30de\u30f3\u30b9\u306f\u0053\uff06\u0050"
+ "\u0035\u0030\u0030\u6307\u6570\u3092\u4e0a\u56de\u308b";
Assert.assertEquals(result, origText);
}
@Test
public void testType3FontWidth() throws Exception {
String inFile = "type3font_text.pdf";
LineSegment origLineSegment = new LineSegment(new Vector(20.3246f, 769.4974f, 1.0f), new Vector(151.22923f, 769.4974f, 1.0f));
PdfReader reader = TestResourceUtils.getResourceAsPdfReader(this, inFile);
TextPositionRenderListener renderListener = new TextPositionRenderListener();
PdfContentStreamProcessor processor = new PdfContentStreamProcessor(renderListener);
PdfDictionary pageDic = reader.getPageN(FIRST_PAGE);
PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
processor.processContent(ContentByteUtils.getContentBytesForPage(reader, FIRST_PAGE), resourcesDic);
Assert.assertEquals(renderListener.getLineSegments().get(FIRST_ELEMENT_INDEX).getStartPoint().get(FIRST_ELEMENT_INDEX),
origLineSegment.getStartPoint().get(FIRST_ELEMENT_INDEX), 1 / 2f);
Assert.assertEquals(renderListener.getLineSegments().get(FIRST_ELEMENT_INDEX).getEndPoint().get(FIRST_ELEMENT_INDEX),
origLineSegment.getEndPoint().get(FIRST_ELEMENT_INDEX), 1 / 2f);
}
private static class TextPositionRenderListener implements RenderListener {
List<LineSegment> lineSegments = new ArrayList<LineSegment>();
public List<LineSegment> getLineSegments() {
return lineSegments;
}
public void renderText(TextRenderInfo renderInfo) {
lineSegments.add(renderInfo.getBaseline());
}
public void beginTextBlock() {
}
public void endTextBlock() {
}
public void renderImage(ImageRenderInfo renderInfo) {
}
}
private static class CharacterPositionRenderListener implements TextExtractionStrategy{
public void beginTextBlock() {
}
public void renderText(TextRenderInfo renderInfo) {
List<TextRenderInfo> subs = renderInfo.getCharacterRenderInfos();
TextRenderInfo previousCharInfo = subs.get(0);
for(int i = 1; i < subs.size(); i++){
TextRenderInfo charInfo = subs.get(i);
Vector previousEndPoint = previousCharInfo.getBaseline().getEndPoint();
Vector currentStartPoint = charInfo.getBaseline().getStartPoint();
assertVectorsEqual(charInfo.getText(), previousEndPoint, currentStartPoint);
previousCharInfo = charInfo;
}
}
private void assertVectorsEqual(String message, Vector v1, Vector v2){
Assert.assertEquals(message, v1.get(0), v2.get(0), 1/72f);
Assert.assertEquals(message, v1.get(1), v2.get(1), 1/72f);
}
public void endTextBlock() {
}
public void renderImage(ImageRenderInfo renderInfo) {
}
public String getResultantText() {
return null;
}
}
private byte[] createSimplePdf(Rectangle pageSize, final String... text) throws Exception{
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
final Document document = new Document(pageSize);
PdfWriter.getInstance(document, byteStream);
document.open();
for (String string : text) {
document.add(new Paragraph(string));
document.newPage();
}
document.close();
final byte[] pdfBytes = byteStream.toByteArray();
return pdfBytes;
}
}